home *** CD-ROM | disk | FTP | other *** search
- unit phon1;
-
- interface
-
- uses
- Windows, Messages, SysUtils, Classes, Graphics, Controls, Forms, Dialogs,
- StdCtrls, Db, DBTables;
-
- type
- TForm1 = class(TForm)
- Memo1: TMemo;
- procedure FormCreate(Sender: TObject);
- private
- public
- procedure ShowValue(aKey: string);
- function Soundex(aKey: string): string;
- function Soundex2(aKey: string): string;
- function Metaphone(aKey: string): string;
- end;
-
- var
- Form1: TForm1;
-
- implementation
-
- {$R *.DFM}
-
- function TForm1.Soundex(aKey: string): string;
- const {ABCDEFGHIJKLMNOPQRSTUVWXYZ}
- LetterCodes = '01230120022455012623010202';
- MaxCodeLength = 4;
- var
- I: Integer;
- Ch: Char;
- LastCh: Char;
- begin
- Result := '';
- LastCh := #0;
- I := 1;
- while Length(Result) <> MaxCodeLength do begin
- if I > Length(aKey) then
- Result := Result + '0'
- else begin
- Ch := UpCase(aKey[I]);
- if Ch in ['A'..'Z'] then
- if Length(Result) = 0 then
- Result := Ch
- else begin
- Ch := LetterCodes[Ord(Ch) - 64];
- if (Ch <> '0') and (Ch <> LastCh) then begin
- Result := Result + Ch;
- LastCh := Ch;
- end;
- end;
- Inc(I);
- end;
- end;
- end;
-
- function TForm1.Soundex2(aKey: string): string;
- { This is an "improved" Soundex algorithm from Joe Celko's "SQL for Smarties",
- 1995, Morgan Kaufman Publishers, Inc, pp 85-87. }
- const
- Vowels = ['A', 'E', 'I', 'O', 'U'];
- var
- I: Integer;
- Ch: Char;
-
- function TransformPrefix(aFind, aReplace: string): Boolean;
- begin
- Result := CompareText(aFind, aKey) = 0;
- if Result then
- Move(aReplace[1], aKey[1], Length(aReplace));
- end;
-
- procedure Transform(aFind, aReplace: string);
- var
- I, J: Integer;
- Match: Boolean;
- begin
- for I := 2 to Length(aKey) - Length(aFind) + 1 do begin
- Match := True;
- for J := 1 to Length(aFind) do
- if aKey[I] <> aFind[J] then begin
- Match := False;
- Break;
- end;
- if Match then
- Move(aReplace[1], aKey[I], Length(aReplace));
- end;
- end;
- begin
-
- { first pass: convert to upper, strip non letters, reduce vowels }
- I := 1;
- while I <= Length(aKey) do begin
- Ch := UpCase(aKey[I]);
- if Ch in ['A'..'Z'] then begin
- if Ch in Vowels then
- Ch := 'A';
- aKey[I] := Ch;
- Inc(I);
- end
- else
- Delete(aKey, I, 1);
- end;
-
- { Transform prefixes }
- if not TransformPrefix('MAC', 'MCC') then
- if not TransformPrefix('KN', 'NN') then
- if not TransformPrefix('K', 'C') then
- if not TransformPrefix('PF', 'FF') then
- if not TransformPrefix('SCH', 'SSS') then
- TransformPrefix('PH', 'FF');
-
- { Transform letter combinations }
- Transform('DG', 'GG');
- Transform('CAAN', 'TAAN');
- Transform('D', 'T');
- Transform('NST', 'NSS');
- Transform('AV', 'AF');
- Transform('Q', 'G');
- Transform('Z', 'S');
- Transform('M', 'N');
- Transform('KN', 'NN');
- Transform('K', 'C');
-
- { Replace H with A unless it is surrounded by A }
- I := 2;
- while I <= Length(aKey) do begin
- if aKey[I] = 'H' then
- if aKey[I - 1] <> 'A' then
- if (I < Length(aKey)) and (aKey[I + 1] <> 'A') then
- aKey[I] := 'H'
- else
- Inc(I); { we know it's A so no need to check for H }
- Inc(I);
- end;
-
- { Replace AW with A }
- I := 2;
- while I < Length(aKey) do begin
- if (aKey[I] = 'A') and (aKey[I + 1] = 'W') then
- Delete(aKey, I + 1, 1);
- Inc(I);
- end;
-
- { Transforms }
- Transform('PH', 'FF');
- Transform('SCH', 'SSS');
-
- { Drop trailing A or S chars }
- I := Length(aKey);
- while (I > 0) and (aKey[I] in ['A', 'S']) do begin
- Delete(aKey, I, 1);
- Dec(I);
- end;
-
- { Transform trailing NT with TT }
- if Copy(aKey, Length(aKey) - 1, 2) = 'NT' then
- aKey := Copy(aKey, 1, Length(aKey) - 2) + 'TT';
-
- { Strip out all but leading A's }
- I := 2;
- while I <= Length(aKey) do
- if aKey[I] = 'A' then
- Delete(aKey, I, 1)
- else
- Inc(I);
-
- { Collapse repeating codes }
- I := 2;
- while I <= Length(aKey) do
- if aKey[I] = aKey[I - 1] then
- Delete(aKey, I, 1)
- else
- Inc(I);
-
- if Length(aKey) < 4 then
- aKey := aKey + StringofChar(' ', 4 - Length(aKey));
-
- Result := aKey;
- end;
-
- function TForm1.Metaphone(aKey: string): string;
- const
- MaxCodeLength = 6;
- VowelSet = ['A', 'E', 'I', 'O', 'U'];
- NonTransformSet = ['F', 'J', 'L', 'M', 'N', 'R'];
- EIYSet = ['E', 'I', 'Y'];
- var
- Ch: Char;
- I: Integer;
- KeyBuffer: array[0..256] of Char;
- KeyBufLen: Integer; { Number of chars in buffer }
- Key: PChar; { Pointer to start of string }
- LastCharPos: Integer; { Position of last char in the buffer }
- begin
- Result := '';
-
- { Retain uppercase alpha characters in buffer; the buffer will
- always have at least one #0 placeholder before and after the
- keyword. This avoids the need to check length bounds when
- comparing previous or next letters. }
- FillChar(KeyBuffer, SizeOf(KeyBuffer), #0);
- Key := @KeyBuffer[1];
- KeyBufLen := 0;
- for I := 1 to Length(aKey) do begin
- Ch := UpCase(aKey[I]);
- if Ch in ['A'..'Z'] then begin
- Key[KeyBufLen] := Ch;
- Inc(KeyBufLen);
- end;
- end;
- LastCharPos := KeyBufLen - 1;
-
- { Transform prefixes }
- if CompareMem(Key, PChar('GN'), 2) or
- CompareMem(Key, PChar('KN'), 2) or
- CompareMem(Key, PChar('PN'), 2) or
- CompareMem(Key, PChar('AE'), 2) or
- CompareMem(Key, PChar('WH'), 2) or
- CompareMem(Key, PChar('WR'), 2) then
- Inc(Key)
- else if Key[0] = 'X' then
- Key[0] := 'S';
-
- for I := 0 to LastCharPos do begin
-
- { Skip duplicating letters except for C }
- if (Key[I - 1] = Key[I]) and (Key[I] <> 'C') then
- Continue;
-
- { Retain nontransform letters }
- if (Key[I] in NonTransformSet) or
- ((I = 0) and (Key[I] in VowelSet)) then begin
- Result := Result + Key[I];
- Continue;
- end;
-
- { Apply transforms }
- case Key[I] of
- 'B': { retain unless within -MB }
- if not ((I = LastCharPos) and (Key[I - 1] = 'M')) then
- Result := Result + 'B';
- 'C': { drop if in -SCI-, -SCE- or -SCY- }
- if not ((Key[I - 1] = 'S') and (Key[I + 1] in EIYSet)) then
- { map to X if in -CIA- or -CH-}
- if ((Key[I + 1] = 'I') and (Key[I + 2] = 'A')) or
- (Key[I + 1] = 'H') then
- Result := Result + 'X'
- else
- { map to S if in -CE-, -CI- or -CY- }
- if Key[I + 1] in EIYSet then
- Result := Result + 'S'
- else { otherwise K }
- Result := Result + 'K';
- 'D': { map to J if in -DGE-, -DGI- or -DGY- }
- if (Key[I + 1] = 'G') and (Key[I + 2] in EIYSet) then
- Result := Result + 'J'
- else { otherwise T }
- Result := Result + 'T';
- 'G': { map to J if in -GE-, -GI, -GY and not GG }
- if (Key[I + 1] in EIYSet) and (Key[I - 1] <> 'G') then
- Result := Result + 'J'
- else
- { drop if in -GH- but not at end or before a vowel }
- if not ((Key[I + 1] = 'H') and (I <> LastCharPos - 1) and not (Key[I + 2] in VowelSet)) or
- { drop if in -GNED }
- ((I = LastCharPos - 3) and CompareMem(@Key[I + 1], PChar('NED'), 3)) or
- { drop if in -GN }
- ((I = LastCHarPos - 1) and (Key[I + 1] = 'N')) or
- { drop if in -DGE-, -DGI- or -DGY- }
- ((Key[I - 1] = 'D') and (Key[I + 1] in EIYSet)) then
- { otherwise K }
- Result := Result + 'K';
- 'H': { retain if before a vowel and not after C, G, P, S or T }
- if (Key[I + 1] in VowelSet) and
- not (Key[I - 1] in ['C', 'G', 'P', 'S', 'T']) then
- Result := Result + 'H';
- 'K': { retain unless after C }
- if Key[I - 1] <> 'C' then
- Result := Result + 'K';
- 'P': { map to F if before H }
- if Key[I + 1] = 'H' then
- Result := Result + 'F'
- else { otherwise P }
- Result := Result + 'P';
- 'Q': { map to K }
- Result := Result + 'K';
- 'S': { map to X if in -SH-, -SIO- or -SIA- }
- if (Key[I + 1] = 'H') or
- ((Key[I + 1] = 'I') and (Key[I + 2] in ['O', 'A'])) then
- Result := Result + 'X'
- else { otherwise S }
- Result := Result + 'S';
- 'T': { map to X if in -TIA- or -TIO- }
- if (Key[I + 1] = 'I') and (Key[I + 2] in ['O', 'A']) then
- Result := Result + 'X'
- else
- { map to 0 (zero) if before H }
- if Key[I + 1] = 'H' then
- Result := Result + '0'
- else
- { drop if in -TCH- }
- if not ((Key[I + 1] = 'C') and (Key[I + 2] = 'H')) then
- { otherwise T }
- Result := Result + 'T';
- 'V': { map to F }
- Result := Result + 'F';
- 'W',
- 'Y': { retain if after a vowel }
- if Key[I - 1] in VowelSet then
- Result := Result + Key[I];
- 'X': { map to KS }
- Result := Result + 'KS';
- 'Z': { map to S }
- Result := Result + 'S';
- end;
-
- { terminate if max code length is reached }
- if Length(Result) = MaxCodeLength then
- Break;
- end;
- end;
-
- procedure TForm1.ShowValue(aKey: string);
- begin
- with Memo1.Lines do
- Add(Format('%-15s %-10s %-10s',
- [aKey, Soundex(aKey), Metaphone(aKey)]));
- end;
-
- procedure TForm1.FormCreate(Sender: TObject);
- begin
- with Memo1.Lines do begin
- Clear;
- Add(Format('%-15s %-10s %-10s',
- ['NAME', 'SOUNDEX', 'METAPHONE']));
- end;
-
- ShowValue('Smith');
- ShowValue('Smythe');
- ShowValue('Smit');
- ShowValue('Dickson');
- ShowValue('Dixon');
- ShowValue('Troxell');
- ShowValue('Troxel');
- ShowValue('Troxwell');
- ShowValue('Traxell');
- ShowValue('Traxel');
- ShowValue('Trachsel');
- ShowValue('Troyell');
- ShowValue('Troyel');
- ShowValue('Trovell');
- ShowValue('Trovel');
- ShowValue('Tropical');
- end;
-
- end.
-